/****************************************************************************************************/
** Overview:                                                                                        */
** First we use World Bank's WDI data base                                                          */
** Then we use the OECD's estimates 																*/
** Then we use Penn World Tables, the most recent (as of 12/31/2012)                                */
** version of which is 7.1                                                                          */
** Then we use IMF data (last updated October, 2012),                                               */
** and BLS data to clean up east/west Germany.                                                      */
** Then we turn to Maddison's data, then the CIA Factbook,                                          */
** and finally interpolation.                                                                       */
**                                                                                                  */
** The procedure works as follows. We take base GDP data                                            */
** from the World Bank. Then, for each other data set, and each country-year,                       */
** we figure out the growth rate of GDP from the previous year to                                   */
** that year in the other data set. If GDP is missing in that year but not                          */
** the previous year, we fill in GDP by growing last year's GDP by                                  */
** the estimated growth rate. For example, suppose GDP is 10000                                     */
** in 1990 in France in the WDI data, but missing in 1991.                                          */
** In the PWT data, we estiamte GDP of 9000 in 1990 and 9180                                        */
** in 1991. Then we fill in GDP in France in 1991 as 10200 (9180/9000*10000).                       */
** We also repeat the procedure going backwards (so that if GDP is missing                          */
** in 1990 but not 1991, we can fill in the 1990 value). This procedure                             */            
** gets the level of GDP right, pegged to the base year (2000 US dollars) of the WDI                */
** GDP data.                                                                                        */
**                                                                                                  */
** If a country is missing GDP data entirely (rather than missing specific                          */
** years), then for that country (for a given year), we compute take the ratio of GDP per capita    */ 
** in that country to GDP per capita in the US, using the the PWT/IMF/CIA                           */
** data. Then we multiply the WDI's US GDP per capita by the computed                               */
** ratio, and take the product as the value of GDP for the new country.                             */
/****************************************************************************************************/


* Format the WDI GDP, unemployment and inflation data

/************************************************************************************************/
* I format the WDI data here. Our starting point is the raw WDI_GDP data file.
* Each row is a country-indicator (country-gdp, country-inflation, etc.).  
* The first row is column names. The first four columns are country name, country code, indicator name, and indicator code. 
* The remaining columns are the values of the indicators for 1960 through 2012.
* To get the data into a usable format, I go indicator by indicator, reshaping the data and saving a separate data set. I then merge the four datasets together to make a single wdi data set. 
* Note that GDP data only begin in 1980 and are in 2005 international $.
* Data downloaded from: http://data.worldbank.org/data-catalog/world-development-indicators on 12/31/2012
/************************************************************************************************/

ssc install tomode

	/* Read in first row as names*/
	insheet using "$directory/gdp/Raw/WDI/WDI_Data.csv", clear
	foreach v of varlist _all {
		local name=`v'[1]
		local vartype: type `v'
		if "`vartype'"=="int" | "`vartype'"=="long" | "`vartype'"=="double" {
			local name="n_"+"`name'"
		}
		else {
			local name=lower("`name'")
			local name=subinstr("`name'"," ","_",10)
		}
		rename `v' `name'
	}
	drop if _n==1
	/* Clean the dataset and reshape it*/
	replace indicator_code=subinstr(indicator_code," ","",99)
	keep if indicator_code=="NY.GDP.PCAP.PP.KD" | indicator_code=="FP.CPI.TOTL.ZG" | indicator_code=="NY.GDP.DEFL.KD.ZG" | indicator_code=="SL.UEM.TOTL.ZS"
    rename country_code cty
	replace cty="ROM" if cty=="ROU"
    rename indicator_name indicator
	compress
	for ! in any NY.GDP.PCAP.PP.KD FP.CPI.TOTL.ZG NY.GDP.DEFL.KD.ZG SL.UEM.TOTL.ZS \ @ in any gdp inflation deflator unemployment: replace indicator_code="@" if indicator_code=="!"
    levelsof indicator_code, local(inds)
    foreach ind of local inds {
        preserve
        keep if indicator_code=="`ind'"
        keep cty n_*
		foreach v of varlist n_* {
            local y = subinstr("`v'", "n_", "", 1)
            rename `v' `ind'`y'
        }
        reshape long `ind', i(cty) j(year)
        save "$directory/gdp/Processed/wdi_`ind'", replace
        restore
    }
    
    use "$directory/gdp/Processed/wdi_gdp", clear
    foreach indicator in deflator inflation unemployment {
        merge cty year using "$directory/gdp/Processed/wdi_`indicator'", sort
        drop _merge
		rm "$directory/gdp/Processed/wdi_`indicator'.dta"
    }
	rm "$directory/gdp/Processed/wdi_gdp.dta"
    
    /* rescale inflation and unemployment */
    replace inflation = inflation/100 
	replace unemployment=unemployment/100
    
    /* There's no documentation on this, but inflation of -100 has to indicate missing */
    replace inflation = . if inflation == -1
    
    /* make a dataset-specific variable */
    gen gdp_wdi = gdp
    save "$directory/gdp/Processed/wdi", replace

* Format the OECD data, downloaded from http://stats.oecd.org/# on 12/31/2012. (Note Alan Heston's advice to use prefer the OECD data to the PWT's)
	insheet using "$directory/gdp/Raw/OECD/SNA_TABLE1_Data_48ffdfba-10a4-4892-be82-1f729f66545b.csv", clear
	gen str3 cty=""
	replace cty="AUS" if country=="Australia"
	replace cty="AUT" if country=="Austria"
	replace cty="BEL" if country=="Belgium"
	replace cty="CAN" if country=="Canada"
	replace cty="CHL" if country=="Chile"
	replace cty="CHN" if country=="China"
	replace cty="CZE" if country=="Czech Republic"
	replace cty="DNK" if country=="Denmark"	
	replace cty="EST" if country=="Estonia"
	replace cty="FIN" if country=="Finland"
	replace cty="FRA" if country=="France"
	replace cty="DEU" if country=="Germany"
	replace cty="GRC" if country=="Greece"
	replace cty="HUN" if country=="Hungary"
	replace cty="ISL" if country=="Iceland"
	replace cty="IDN" if country=="Indonesia"
	replace cty="IRL" if country=="Ireland"
	replace cty="ISR" if country=="Israel"
	replace cty="ITA" if country=="Italy"
	replace cty="JPN" if country=="Japan"
	replace cty="KOR" if country=="Korea"
	replace cty="LUX" if country=="Luxembourg"
	replace cty="MEX" if country=="Mexico"
	replace cty="NLD" if country=="Netherlands"
	replace cty="NZL" if country=="New Zealand"
	replace cty="NOR" if country=="Norway"
	replace cty="POL" if country=="Poland"
	replace cty="PRT" if country=="Portugal"
	replace cty="RUS" if country=="Russian Federation"
	replace cty="SVK" if country=="Slovak Republic"
	replace cty="SVN" if country=="Slovenia"
	replace cty="ZAF" if country=="South Africa"
	replace cty="ESP" if country=="Spain"
	replace cty="SWE" if country=="Sweden"
	replace cty="CHE" if country=="Switzerland"
	replace cty="TUR" if country=="Turkey"
	replace cty="GBR" if country=="United Kingdom"
	replace cty="USA" if country=="United States"
	drop if cty==""
	drop transaction measure frequency 
	rename flags flags_oecd
	rename time year
	sort cty year
	rename value gdp_oecd
	la var gdp_oecd "GDP per heard, US $, constant prices, constant PPPs, OECD base year"
	save "$directory/gdp/Processed/oecd", replace
	
* Format the PWT 7.1
* Note: raw data from PWT website, but I delete the colums we don't use
* All data are in 2005 international dollars
* link to zipped excel file: http://pwt.econ.upenn.edu/Downloads/pwt71/pwt71_11302012version.zip
* use rgdpch for chained real ppp gdp, ppp for price level relative to us (adjust using US inflation below)
    insheet  using "$directory/gdp/Raw/PWT/pwt71_wo_country_names_wo_g_vars.csv", comma clear 
	keep isocode year pop ppp p rgdpch
	rename isocode cty

    /* note that this price level is relative to US in each year; also it needs to be adjusted for XR movements */
    rename ppp priceLevel_pwt

    /* standardize Romania's country code */
    replace cty = "ROM" if cty == "ROU"
	replace cty="DEU" if cty=="GER"
	drop if cty=="CH2"

    list year rgdpch if cty=="USA" & year >=1960, noobs sep(0)

    gen gdp_pwt = rgdpch

    save "$directory/gdp/Processed/pwt", replace

/************************************************************************************************/
* I format the IMF data here. The IMF data are like the WDI data.                              
* I am using the October 2012 release of the IMF WEO data.
* Downloaded on 10/14/11, from: http://www.imf.org/external/pubs/ft/weo/2012/02/weodata/WEOOct2012all.xls
* Note that the IMF GDP at PPP data are nominal; interpolate using changes in real GDP per capita in local currency units                                   				
/************************************************************************************************/

	insheet using "$directory/gdp/Raw/IMF/WEOOct2012all.csv", clear
    rename country countryname
    rename iso cty
	replace cty="ROM" if cty=="ROU"
	replace cty="UNK" if cty=="UVK"
	keep if weosubject=="PPPPC" | weosubject=="NGDPRPC" | weosubject=="PCPIPCH" | weosubject=="LUR"
    rename subjectdescriptor indicator

	/* Reshape the dataset, keeping the variables we're focusing on */
    keep countryname cty weosubject indicator v* estimatesstartafter
 	for ! in any PPPPC NGDPRPC PCPIPCH LUR \ @ in any ngdp_ppp rgdp_lcu inflation unemployment: replace weosubject="@" if weosubject=="!"

    levelsof weosubject, local(inds)
    foreach ind of local inds {
        preserve
        keep if weosubject=="`ind'"
        foreach v of varlist v* {
			destring `v', replace ignore("--","n/a")
			local y: variable label `v'
            rename `v' `ind'`y'
        }
        reshape long `ind', i(cty) j(year)
        save "$directory/gdp/imf_`ind'", replace
        restore
    }

    
    use "$directory/gdp/imf_ngdp_ppp", clear
    foreach indicator in rgdp_lcu inflation unemployment {
        merge cty year using "$directory/gdp/imf_`indicator'", sort
        tab _merge /* note that _merge is often not equal to 3 because of missing data */
		rm "$directory/gdp/imf_`indicator'.dta"
        drop _merge
    }
	rm "$directory/gdp/imf_ngdp_ppp.dta"
	
	/* Get gdp at real 2005 PPP*/
	gen gdp=ngdp_ppp if year==2005
	sort cty year
	by cty: replace gdp=gdp[_n-1]*rgdp_lcu/rgdp_lcu[_n-1] if gdp==.
	gsort cty -year
    by cty: replace gdp=gdp[_n-1]*rgdp_lcu/rgdp_lcu[_n-1] if gdp==.
	drop ngdp_ppp rgdp_lcu
	
    /* get inflation and unemployment variables ready */
	sort cty year
    replace inflation = inflation/100
	replace unemployment = unemployment/100
    foreach var of varlist gdp inflation unemployment {
        rename `var' `var'_imf
    }
    save "$directory/gdp/Processed/imfNew", replace

* Format the CIA data
	import excel using "$directory/gdp/Raw/CIA/CIA World Factbook.xlsx", sheet("stata") firstrow clear
    replace cty = "ROM" if cty == "ROU"
	replace cty="MMR" if cty=="BUR"
	egen latest=max(year_factbook), by(cty year)
	keep if year_factbook==latest
	rename gdp gdp_cia
	replace country="Palestine" if country=="Gaza Strip" | country=="West Bank"
	collapse (mean) gdp, by(cty country year)
	sort cty year
	drop if missing(year)
	drop if country=="World"
	drop if cty=="EU"
	drop if year==.
    bysort cty year: assert _n==1
    save "$directory/gdp/Processed/ciaAll", replace

* Format Angus Maddison's data.  Source: http://www.ggdc.net/MADDISON/Historical_Statistics/horizontal-file_02-2010.xls "Statistics on World Population, GDP and Per Capita GDP, 1-2008 AD"
    insheet using "$directory/gdp/Raw/Maddison/maddison.csv", comma clear
    /* standardize Romania's country code */
    replace cty = "ROM" if cty == "ROU"
	replace cty = "YUG" if cty=="YUGO"
	replace cty = "MMR" if cty=="BUR"
	replace cty = "DEU" if cty=="GER"
	compress
    qui tostring gdp*, replace
    reshape long gdp, i(cty) j(year)
    replace gdp = subinstr(gdp,",","",.)
    replace gdp = "" if gdp == "."
    assert !missing(real(gdp)) if !missing(gdp)
    destring gdp, replace force
    rename gdp gdp_maddison
    keep if year>=1940 & !missing(gdp_maddison)
    save "$directory/gdp/Processed/maddison", replace

* Format the ILO unemployment data: Downloaded from http://laborsta.ilo.org/STP/guest on 12/31/2012 
    insheet using "$directory/gdp/Raw/ILO/laborsta3A.csv", comma clear
	label data "Downloaded from http://laborsta.ilo.org/STP/guest on 12/31/2012" 
    keep if sex == "Rates, total"
    quietly do "$directory/gdp/Raw/ILO/iloXwalk.do" /* make a cty code */
	assert !missing(cty)

    /* standardize Romania's country code */
    replace cty = "ROM" if cty == "ROU"
    /********************************************************************/
    /* There are several kinds of sources ILO uses for its information: */
    /* The main 4 are population censuses, employment office records,   */
    /* labour force surveys, and official estimates. When multiple      */
    /* sources are available, I prioritize in the listed order. There   */
    /* are a few other sources: administrative reports, insurance       */
    /* records, household surveys, and household income/expenditure     */
    /* surveys. I prioritize in that order, as well.                    */
    /********************************************************************/
    
    assert !missing(source)
    
    gen     priority = 1 if strpos(source, "Population census")
    replace priority = 2 if strpos(source, "Employment office records")
    replace priority = 3 if strpos(source, "Labour force survey")
    replace priority = 4 if strpos(source, "Official estimates")
    replace priority = 5 if strpos(source, "Administrative reports")
    replace priority = 6 if strpos(source, "Insurance records")
    replace priority = 7 if strpos(source, "Household survey")
    replace priority = 8 if strpos(source, "Household income/expenditure survey")
    assert !missing(priority)
	summ priority
	local minpriority=r(min)
	local maxpriority=r(max)
    drop if source_id != .
    
    collapse (mean) d*, by(priority cty)
    
    foreach var of varlist d*{
        rename `var' `var'_
    }
    keep d* cty priority
    reshape wide d*, i(cty) j(priority)

    forvalues year = 1969/2008{
        forvalues p = `minpriority'/`maxpriority' {
            rename d`year'_`p' unemployment_`p'_`year'
        }
    }
    
    #delimit ;
    reshape long    unemployment_1_ unemployment_2_ unemployment_3_ unemployment_4_ 
                    unemployment_5_ unemployment_6_ unemployment_7_ unemployment_8_,
                    i(cty) j(year);
    #delimit cr
    
    gen unemployment_ilo = unemployment_1
    forvalues p = `minpriority' / `maxpriority' {
        replace unemployment_ilo = unemployment_`p' if missing(unemployment_ilo)
    }
    keep cty year unemployment_ilo
    gen gdp_ilo = .
    gen inflation_ilo = .
    gen priceLevel_ilo = .
    sort cty year
	replace unemployment_ilo=unemployment_ilo/100
    list unemployment_ilo year if cty == "USA"
    sum unemployment_ilo, det
    save "$directory/gdp/Processed/ilo", replace

* Format the East/West Germany unemployment data
    insheet using "$directory/gdp/Raw/germanyUnemployment.csv", comma clear
    rename inflation inflation_germany
 	replace unemployment=unemployment/100
	rename unemployment unemployment_germany
    rename pricelevel priceLevel_germany
    save "$directory/gdp/Processed/germanyUnemploymentInflation", replace
    
/****************************************************************************************************/
/*                                                                                                  */
/*      Actually assemble the GDP/indicators data set now                                           */
/*                                                                                                  */
/****************************************************************************************************/

* Begin with World Bank data; clean and inspect the data
    use "$directory/gdp/Processed/wdi", clear 
	fillin cty year
	rename _fillin _fillin_wdi
	
/* keep track of where the GDP data eventually comes from */
    gen source = "wdi" if !missing(gdp)

* Merge in OECD
	merge cty year using "$directory/gdp/Processed/oecd", sort unique
	rename _merge _merge_oecd
	tab cty _merge
	fillin cty year
	rename _fillin _fillin_oecd
	sort cty year
	by cty: replace gdp=gdp[_n-1]*gdp_oecd/gdp_oecd[_n-1] if gdp==.
	gsort cty -year
	by cty: replace gdp=gdp[_n-1]*gdp_oecd/gdp_oecd[_n-1] if gdp==.
	levelsof year, local(years)
	qui foreach y of local years {
    	summ gdp if cty=="USA" & year==`y'
    	local us_gdp=r(mean)
    	summ gdp_oecd if cty=="USA" & year==`y'
    	local us_gdp_oecd=r(mean)
    	replace gdp=gdp_oecd*`us_gdp'/`us_gdp_oecd' if gdp==. & year==`y'
    }
	
	* Merge in PWT
    merge cty year using "$directory/gdp/Processed/pwt", sort unique
    rename _merge _merge_pwt
    tab cty _merge_pwt
	fillin cty year
	rename _fillin _fillin_pwt
    sort cty year
    by cty: replace gdp=gdp[_n-1]*gdp_pwt/gdp_pwt[_n-1] if gdp==.
    gsort cty -year
    by cty: replace gdp=gdp[_n-1]*gdp_pwt/gdp_pwt[_n-1] if gdp==.
    levelsof year, local(years)
    qui foreach y of local years {
    	summ gdp if cty=="USA" & year==`y'
    	local us_gdp=r(mean)
    	summ gdp_pwt if cty=="USA" & year==`y'
    	local us_gdp_pwt=r(mean)
    	replace gdp=gdp_pwt*`us_gdp'/`us_gdp_pwt' if gdp==. & year==`y'
    }
    count if year == 1950
    *assert r(N)>0
    count if year == 1950 & !missing(pop) & !missing(gdp)
    *assert r(N)>0
    
/* keep track of where the GDP data eventually comes from */
    replace source = "pwt" if !missing(gdp) & missing(source)
    tab source, missing

* Merge in IMF
    sort cty year
    merge cty year using "$directory/gdp/Processed/imfNew", sort
    rename _merge _merge_imf
    tab cty _merge_imf
	fillin cty year
	rename _fillin _fillin_IMF
    sort cty year
    by cty: replace gdp=gdp[_n-1]*gdp_imf/gdp_imf[_n-1] if gdp==. 
    gsort cty -year
    by cty: replace gdp=gdp[_n-1]*gdp_imf/gdp_imf[_n-1] if gdp==. 
    qui foreach y of local years {
    	summ gdp if cty=="USA" & year==`y'
    	local us_gdp=r(mean)
    	summ gdp_imf if cty=="USA" & year==`y'
    	local us_gdp_imf=r(mean)
    	replace gdp=gdp_imf*`us_gdp'/`us_gdp_imf' if gdp==. & year==`y'
    }
    sort cty year
    by cty: replace gdp=gdp[_n-1]*gdp_imf/gdp_imf[_n-1] if gdp==. 
    gsort cty -year
    by cty: replace gdp=gdp[_n-1]*gdp_imf/gdp_imf[_n-1] if gdp==. 

/* keep track of where the GDP data eventually comes from */
    replace source = "imf" if !missing(gdp) & missing(source)
    tab source, missing

* Merge in BLS GDP data: West Germany 
	la data "This is an earlier iteration of the BLS international comparisons data, which included West Germany"
    sort cty year
    merge cty year using "$directory/gdp/Raw/BLS/bls gdp per capita"
    rename _merge _merge_bls
	rename blsgdp gdp_bls
    tab cty _merge_bls
    gen usa_gdp=gdp if cty=="USA"
    tomode usa_gdp, by(year) replace
    replace gdp=gdp_bls*usa_gdp/usa_blsgdp if gdp==. & cty=="FRG" /* Create an appropriately-scaled series for FRG */
    replace source ="bls" if !missing(gdp) & missing(source)

* Merge in German data: Note DEU=Unified Germany; FRG=West Germany; GDR=East Germany
    sort cty year
    merge cty year using "$directory/gdp/Raw/germany"
    rename _merge _merge_germany
    sort cty year
    by cty: replace gdp=gdp[_n-1]*german_gdp/german_gdp[_n-1] if gdp==. & cty=="FRG" /* Forward-cast the FRG series */
    gsort cty -year
    by cty: replace gdp=gdp[_n-1]*german_gdp/german_gdp[_n-1] if gdp==. & cty=="FRG" /* Forward-cast the FRG series */
    gen germany_frg=german_gdp if cty=="FRG"
    gen germany_gdr=german_gdp if cty=="GDR"
    gen gdp_frg=gdp if cty=="FRG"
    for X in varlist germany_frg germany_gdr gdp_frg: tomode X, by(year) replace
    replace gdp=gdp_frg*germany_gdr/germany_frg if cty=="GDR"
	replace gdp_frg=. if cty!="GDR" & cty!="FRG"
    replace source ="germany, old" if (cty == "GDR" | cty == "FRG") & year<2007

* Merge in German inflation / unemployment data
    merge cty year using "$directory/gdp/Processed/germanyUnemploymentInflation", sort unique
    rename _merge _merge_germany_i_u

/****************************************************************/
/* Fill in the 2007-2009 German data                            */
/* Data are from http://www.vgrdl.de/Arbeitskreis_VGR/home.asp  */
/*                                                              */
/* Data represent real growth in pc GDP, 2000=100               */
/* East Germany                                                 */
/* 2006 105.8                                                   */
/* 2007 108.2                                                   */
/* 2008 109.4                                                   */
/* 2009 106.2                                                   */
/*                                                              */
/* West Germany                                                 */
/* 2006 106.3                                                   */
/* 2007 109.0                                                   */
/* 2008 110.4                                                   */
/* 2009 104.5                                                   */
/****************************************************************/

fillin cty year
drop _fillin
sort cty year 
replace gdp = gdp[_n-1]*(108.2/105.8) if cty == "GDR" & year == 2007
replace gdp = gdp[_n-1]*(109.4/108.1) if cty == "GDR" & year == 2008
replace gdp = gdp[_n-1]*(106.2/109.4) if cty == "GDR" & year == 2009

replace gdp = gdp[_n-1]*(109.0/106.3) if cty == "FRG" & year == 2007
replace gdp = gdp[_n-1]*(110.4/109.0) if cty == "FRG" & year == 2008
replace gdp = gdp[_n-1]*(104.5/110.4) if cty == "FRG" & year == 2009

replace source ="germany, new" if (cty == "GDR" | cty == "FRG") & year>=2007

* Merge in Angus Maddison's data
merge cty year using "$directory/gdp/Processed/maddison", sort
rename _merge _merge_maddison
tab cty _merge_maddison
fillin cty year
rename _fillin _fillin_maddison
sort cty year
by cty: replace gdp=gdp[_n-1]*gdp_maddison/gdp_maddison[_n-1] if missing(gdp)
gsort cty -year
by cty: replace gdp=gdp[_n-1]*gdp_maddison/gdp_maddison[_n-1] if missing(gdp)

qui foreach y of local years {
    sum gdp if cty=="USA" & year==`y'
    local us_gdp=r(mean)
    sum gdp_maddison if cty=="USA" & year==`y'
    local us_gdp_mad = r(mean)
    replace gdp=gdp_maddison*`us_gdp'/`us_gdp_mad' if missing(gdp) & year==`y'
}
replace source = "maddison" if !missing(gdp) & missing(source)

* Merge in CIA factbook data 2004-2012
sort cty year
merge cty year using "$directory/gdp/Processed/ciaAll"
rename _merge _merge_cia
fillin cty year
rename _fillin _fillin_cia
tab cty _merge_cia
sort cty year
by cty: replace gdp=gdp[_n-1]*gdp_cia/gdp_cia[_n-1] if gdp==.
gsort cty -year
by cty: replace gdp=gdp[_n-1]*gdp_cia/gdp_cia[_n-1] if gdp==.
qui foreach y of local years {
	summ gdp if cty=="USA" & year==`y'
	local us_gdp=r(mean)
	summ gdp_cia if cty=="USA" & year==`y'
	local us_gdp_cia=r(mean)
	replace gdp=gdp_cia*`us_gdp'/`us_gdp_imf' if gdp==. & year==`y'
}
replace source = "cia" if !missing(gdp) & missing(source)

* Extrapolate for 2004 for IRQ CUB PLW PRI PRK SCG
gen lgdp=ln(gdp)
foreach c of any IRQ CUB PLW PRI PRK SCG {
	ipolate lgdp year if cty=="`c'", gen(lgdp_ipolate)
	replace lgdp=lgdp_ipolate if cty=="`c'" & year==2004 & lgdp==.
	replace gdp=exp(lgdp)
	drop lgdp_ipolate
}
replace source = "extrapolation" if !missing(gdp) & missing(source)
tab source, missing

/********************************************************/
/* Fill in inflation and unemployment, using the same   */
/* hierarchy: World Bank / wdi, PWT, IMF                */
/********************************************************/

/* bring in ILO data */
merge cty year using "$directory/gdp/Processed/ilo", sort unique
tab _merge
drop _merge

/************************************************************/
/* right now the price level in the PWT data is relative to */ 
/* US price level, which is always 100. I fix that below,   */
/* and then make an inflation variable for PWT. This        */
/* procedure pegs the price level of the US in the PWT      */
/* to the price level of the US in the WDI, but then allows */
/* all the inflation and so on to vary however it varies    */
/* in the PWT data.                                         */
/************************************************************/

    /*********************************************************************************************/
    /* I fill in the US price level by moving it forward and backward according to WDI inflation */
    /* I take 2000 as the base year.                                                             */
    /*********************************************************************************************/
    sort cty year 
    assert priceLevel_pwt == 1 if year>=1960 & year<=2007 & cty == "USA"
    replace priceLevel_pwt = priceLevel_pwt[_n-1]*(1+inflation) if cty == "USA" & year>=1961
    assert !missing(priceLevel_pwt) if year>=1961 & cty == "USA" & year<=2009
    
    levelsof(year), local(years)
    foreach year of local years{
        sum priceLevel_pwt if cty == "USA" & year == `year'
        replace priceLevel_pwt = priceLevel_pwt*(r(mean)) if cty != "USA" & year==`year'
    }
    sort cty year
    by cty: gen inflation_pwt = (priceLevel_pwt/priceLevel_pwt[_n-1]-1)
    
    gen unemployment_pwt = .
    foreach var in inflation unemployment{
        replace `var' = `var'_pwt if missing(`var')
        replace `var' = `var'_imf if missing(`var')
        replace `var' = `var'_ilo if missing(`var')
        replace `var' = `var'_germany if missing(`var')
    }
    

/************************************************************/
/* Fixing Serbia and Montenegro                             */
/* Serbia and Montenegro are coded as different countries   */
/* in my data, but not in the WVS. To get GDP for the WVS   */
/* analysis, I take the population-weighted average of per  */
/* capita GDP in each country as my measure of pc GDP for   */
/* Serbia and Montenegro together.                          */
/************************************************************/

    fillin cty year
    drop _fillin
    forvalues year = 1990/2009{
        count if !missing(gdp) & cty=="SCG" & year == `year'
        if r(N)==0{
            foreach var of varlist gdp inflation{
                sum `var' if (cty=="SRB" | cty == "MNE") & year==`year' [aw=pop]
                replace `var' = r(mean) if cty == "SCG" & year==`year' & missing(`var')
            }
            replace source = "Averaged" if cty == "SCG" & year==`year'
            replace pop = r(sum_w) if cty == "SCG" & year==`year'
        }
    }

/********************************************************************/
/* Hand-fill some missing unemployment numbers,                     */
/* using data largely from past CIA fact books (except where noted) */
/********************************************************************/
    replace unemployment = .28 if cty == "NGA" & year == 1990 & missing(unemployment) 
    replace unemployment = .28 if cty == "NGA" & year == 1995 & missing(unemployment) 
    replace unemployment = .375 if cty == "BIH" & year == 1998 & missing(unemployment)
    replace unemployment = .14 if cty == "IRN" & year == 2000 & missing(unemployment)
    replace unemployment = .131 if cty == "NGA" & year == 2000 & missing(unemployment) /* From NIGERIA stats website */
    replace unemployment = .40 if cty == "BIH" & year == 2001 & missing(unemployment)
    replace unemployment = .063 if cty == "PAK" & year == 2001 & missing(unemployment)
    replace unemployment = .046 if cty == "UGA"& year == 2001 & missing(unemployment)
    replace unemployment = .60 if cty == "ZWE" & year == 2001 & missing(unemployment)
    replace unemployment = .40 if cty == "BGD" & year == 2002 & missing(unemployment)
    replace unemployment = .13 if cty == "SAU" & year == 2003 & missing(unemployment)
    replace unemployment = 0 if cty == "AND" & year == 2005 & missing(unemployment)
    replace unemployment = .032 if cty == "GTM" & year == 2005 & missing(unemployment)
    replace unemployment = .314 if cty == "SCG" & year == 2006 & missing(unemployment)
    replace unemployment = .02 if cty == "VNM" & year == 2006 & missing(unemployment)
    replace unemployment = .77 if cty == "BFA" & year == 2007 & missing(unemployment)
    replace unemployment = .11 if cty == "GHA" & year == 2007 & missing(unemployment)
    replace unemployment = .30 if cty == "JOR" & year == 2007 & missing(unemployment)
    replace unemployment = .30 if cty == "MLI" & year == 2007 & missing(unemployment)
    replace unemployment = .50 if cty == "ZMB" & year == 2007 & missing(unemployment)    
    replace inflation = .032 if cty == "AND" & year == 2005 & missing(inflation)

    /* look at the results */
    sort year cty
    list year gdp source country if (cty == "MNE" | cty == "SCG" | cty == "SRB") & year>=1990, noobs sep(0)
    
    replace lgdp = ln(gdp) if missing(lgdp)


/*************************/
/* Look at final results */
/*************************/
    count if year>=1960 & year<=2010
    count if missing(gdp) & year>=1960 & year<=2010
	egen ctyid=group(cty)
	tsset ctyid year
    gen growth =  d.gdp/l.gdp
    table source, c(mean gdp mean growth freq) row

/*************************************/
/* Correlation across sources of GDP */
/*************************************/

local sources wdi oecd pwt imf cia maddison
foreach source of local sources {
    gen l`source' = ln(gdp_`source')
}

qui foreach s1 of local sources {
    foreach s2 of local sources {
        if "`s1'"!= "`s2'"{
            gen sample = !missing(l`s1') & !missing(l`s2')
            corr l`s1' l`s2' if sample
            local rho = string(r(rho), "%4.2f") 
            reg l`s1' l`s2'
            predict lgdp_fit, xb
            local cons = string(_b[_cons], "%4.2f")
            local beta = string(_b[l`s2'], "%4.2f")
            local se = string(_se[l`s2'], "%4.2f")
            #delimit ;
            twoway
            	(scatter l`s1' l`s2' if sample, msymbol(circle) mcolor(navy) mlabcolor(navy))
                (line lgdp_fit l`s2' if sample, color(black)),
                title("Comparing log GDP, `s1' vs `s2'", size(small))
                note("`s1' = `cons' +`beta'*(`s2') [se=`se']" 
                    "Correlation = `rho'", ring(0) pos(5) size(small))
                xtitle("Ln(GDP), `s2'", size(small))
                ytitle("Ln(GDP), `s1'", size(small))
                legend(off)
                name(c_`s1'_`s2', replace);
            #delimit cr
 *          graph export "$figdir/compare_`s1'_`s2'.ps", replace
 *          graph save "$figdir/compare_`s1'_`s2'.gph", replace 
            drop lgdp_fit sample
        }
    }
}
# delimit ;
graph combine 
	c_wdi_oecd	c_wdi_pwt	c_wdi_imf	c_wdi_cia	c_wdi_maddison
				c_oecd_pwt	c_oecd_imf	c_oecd_cia	c_oecd_maddison
							c_pwt_imf	c_pwt_cia	c_pwt_maddison
										c_imf_cia	c_imf_maddison
													c_cia_maddison,
    cols(3) imargin(tiny);
# delimit cr

    
/**************************************
* Generate output gaps using HP filter, with smoothing parameter set to 6.25
**************************************/
	ssc install hprescott
    gen lgdp_hp=.
    levelsof cty, local(ctys)
    foreach c of local ctys {
    	di("`c'")
        qui count if cty == "`c'" & !missing(lgdp)
        local count = r(N)
        qui tsreport if cty=="`c'" & !missing(lgdp)
        if (r(N_gaps)==0 & `count'>=10) {
			hprescott lgdp if cty=="`c'", smooth(6.25) stub("`c'")
			replace lgdp_hp=`c'_lgdp_sm if cty=="`c'"
			drop `c'_lgdp* 
    	}
    }
    gen gap=lgdp-lgdp_hp
    count if missing(lgdp_hp) & !missing(gdp)
    if r(N)<100{
        list cty year gdp if missing(lgdp_hp) & !missing(gdp), noobs sep(0)
    }

/****************************************************/
/* We can do better with the former USSR countries  */
/* The trouble is that we have one observations of  */
/* GDP from 1973 for each of them, from Angus       */
/* Maddison, but then nothing again until 1990,     */
/* when most of them became countries, so there's a */
/* gap in the sample. So I just use post 1990 data  */
/****************************************************/
    foreach c in ARM AZE BLR EST KGZ LTU MDA RUS UKR LVA GEO KAZ PSE TJK TKM UZB{
        display "`c'"
		hprescott lgdp if cty=="`c'" & year>=1990, smooth(6.25) stub("`c'")
		replace lgdp_hp=`c'_lgdp_sm if cty=="`c'"
			drop `c'_lgdp* 
    }
    replace gap=lgdp-lgdp_hp
    
    
/*******************************/
/* Drop estimates from the IMF */
/* Nah, let's keep 'em, instead*/
/*******************************/
/*    
    replace gdp = . if year>estimatesstartafter & source == "imf"  ///
        & !(year == 2010 & (cty == "BRA" | cty == "KOR" | cty == "LBN" | cty == "NGA" | cty == "RUS" | cty == "TUR")) 
    
    /* keep IMF estimates for 6 pew countries for 2010 */
 sort cty year
 list cty year gdp source gdp_wdi gdp_pwt if cty=="ZWE"
*/

/********************************************/
/* Look at the distribution of inflation    */
/********************************************/
    sum inflation if year>=1980 & year<=2010, det
    /* Follow Barro (1996) coding, and make piecewise linear */
	gen inflation_med=(inflation-.15)*(inflation>=0.15)
	gen inflation_high=(inflation-0.40)*(inflation>=.40)
/****************************************************************************************/
/* Make OECD interactions, and also missing indicators for inflation and unemployment   */
/****************************************************************************************/    
    gen missing_inflation = missing(inflation)
    gen missing_unemployment = missing(unemployment)
    replace inflation = -1 if missing(inflation)
    replace inflation_med = 0 if missing(inflation_med)
    replace inflation_high = 0 if missing(inflation_high)
    replace unemployment = -1 if missing(unemployment)
    gen oecd=0
    for X in any AUT BEL CAN DNK FRA DEU GRC ISL IRL ITA LUX NLD NOR PRT ESP SWE CHE TUR GBR USA JPN FIN AUS NZL MEX CZE KOR HUN POL SVK: replace oecd=1 if cty=="X"
    gen oecdUnemployment = oecd*unemployment
    gen nonOecdUnemployment =(1-oecd)*unemployment
    gen missingOecdUnemployment = missing_unemployment*oecd

/********************/
/* make lags of GDP */
/********************/
    sort cty year
    forvalues l = 1/20{
        by cty: gen lgdp_lag`l' = lgdp[_n-`l']
    }  
	for X in num 1950(10)2010: gen lgdp_X=lgdp if year==X \ tomode lgdp_X, by(cty) replace
	
/*****************/
/* Sort and save */
/*****************/
    sort cty year
	la data "GDP data compiled from World Bank, OECD Penn World Tables, IMF, Maddison and CIA"
	compress
    save "$directory/gdp/Complete_GDP", replace



